{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 三招解决数据连接问题"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. 导入工具"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. 读入数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 指明数据文件的路径\n",
    "xlsales = r'data/sales/销售总表.xlsx'\n",
    "xlsurvey = r'data/account/测绘发票.xlsx'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1 读入 **销售总表**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>房号</th>\n",
       "      <th>客户</th>\n",
       "      <th>签约日期</th>\n",
       "      <th>签约面积</th>\n",
       "      <th>签约金额</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>sd-4.2.901</td>\n",
       "      <td>钱甲</td>\n",
       "      <td>2022-09-05</td>\n",
       "      <td>89</td>\n",
       "      <td>445000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>sd-5.1.301</td>\n",
       "      <td>赵乙</td>\n",
       "      <td>2021-03-04</td>\n",
       "      <td>89</td>\n",
       "      <td>489500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>sd-7.1.501</td>\n",
       "      <td>刘丙</td>\n",
       "      <td>2022-07-13</td>\n",
       "      <td>108</td>\n",
       "      <td>572400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sd-7.2.2002</td>\n",
       "      <td>张丁</td>\n",
       "      <td>2023-05-09</td>\n",
       "      <td>108</td>\n",
       "      <td>529200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            房号  客户       签约日期  签约面积    签约金额\n",
       "0   sd-4.2.901  钱甲 2022-09-05    89  445000\n",
       "1   sd-5.1.301  赵乙 2021-03-04    89  489500\n",
       "2   sd-7.1.501  刘丙 2022-07-13   108  572400\n",
       "3  sd-7.2.2002  张丁 2023-05-09   108  529200"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "销售总表 = pd.read_excel(xlsales)\n",
    "销售总表"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2 读入 **测绘发票**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>房号</th>\n",
       "      <th>测绘面积</th>\n",
       "      <th>发票代码</th>\n",
       "      <th>发票号码</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>sd-5.1.301</td>\n",
       "      <td>89.01</td>\n",
       "      <td>36002200102</td>\n",
       "      <td>98429813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>sd-7.1.501</td>\n",
       "      <td>108.00</td>\n",
       "      <td>36002200103</td>\n",
       "      <td>47837348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>sd-4.2.901</td>\n",
       "      <td>89.01</td>\n",
       "      <td>36002200103</td>\n",
       "      <td>50918623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sd-7.2.2002</td>\n",
       "      <td>108.00</td>\n",
       "      <td>36002200983</td>\n",
       "      <td>29839742</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            房号    测绘面积         发票代码      发票号码\n",
       "0   sd-5.1.301   89.01  36002200102  98429813\n",
       "1   sd-7.1.501  108.00  36002200103  47837348\n",
       "2   sd-4.2.901   89.01  36002200103  50918623\n",
       "3  sd-7.2.2002  108.00  36002200983  29839742"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "测绘发票 = pd.read_excel(xlsurvey)\n",
    "测绘发票"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. 连接数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = pd.merge(\n",
    "    left=销售总表,\n",
    "    right=测绘发票,\n",
    "    on='房号',\n",
    "    how='left'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img alt=\"images/销售总表与测绘发票连接表.png\" src=\"images/销售总表与测绘发票连接表.png\">"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>房号</th>\n",
       "      <th>客户</th>\n",
       "      <th>签约日期</th>\n",
       "      <th>签约面积</th>\n",
       "      <th>签约金额</th>\n",
       "      <th>测绘面积</th>\n",
       "      <th>发票代码</th>\n",
       "      <th>发票号码</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>sd-4.2.901</td>\n",
       "      <td>钱甲</td>\n",
       "      <td>2022-09-05</td>\n",
       "      <td>89</td>\n",
       "      <td>445000</td>\n",
       "      <td>89.01</td>\n",
       "      <td>36002200103</td>\n",
       "      <td>50918623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>sd-5.1.301</td>\n",
       "      <td>赵乙</td>\n",
       "      <td>2021-03-04</td>\n",
       "      <td>89</td>\n",
       "      <td>489500</td>\n",
       "      <td>89.01</td>\n",
       "      <td>36002200102</td>\n",
       "      <td>98429813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>sd-7.1.501</td>\n",
       "      <td>刘丙</td>\n",
       "      <td>2022-07-13</td>\n",
       "      <td>108</td>\n",
       "      <td>572400</td>\n",
       "      <td>108.00</td>\n",
       "      <td>36002200103</td>\n",
       "      <td>47837348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sd-7.2.2002</td>\n",
       "      <td>张丁</td>\n",
       "      <td>2023-05-09</td>\n",
       "      <td>108</td>\n",
       "      <td>529200</td>\n",
       "      <td>108.00</td>\n",
       "      <td>36002200983</td>\n",
       "      <td>29839742</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            房号  客户       签约日期  签约面积    签约金额    测绘面积         发票代码      发票号码\n",
       "0   sd-4.2.901  钱甲 2022-09-05    89  445000   89.01  36002200103  50918623\n",
       "1   sd-5.1.301  赵乙 2021-03-04    89  489500   89.01  36002200102  98429813\n",
       "2   sd-7.1.501  刘丙 2022-07-13   108  572400  108.00  36002200103  47837348\n",
       "3  sd-7.2.2002  张丁 2023-05-09   108  529200  108.00  36002200983  29839742"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. 生成连接表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 创建输出目录函数\n",
    "def create_outpath():\n",
    "    # 时序目录名\n",
    "    from pathlib import Path\n",
    "    import datetime\n",
    "    oufp = datetime.datetime.today().strftime('%Y%m%d%H%M%S')\n",
    "    this_dir = Path('.')\n",
    "    oufp = (this_dir / oufp)\n",
    "    \n",
    "    if not Path.exists(oufp):\n",
    "        Path.mkdir(oufp)\n",
    "        return oufp\n",
    "    else:\n",
    "        return False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "成功生成 : 含测绘发票的销售总表.xlsx\n"
     ]
    }
   ],
   "source": [
    "# 指明生成的 Excel 文件的路径\n",
    "path = create_outpath()\n",
    "if path is not False:\n",
    "    ouf = ( path / r'含测绘发票的销售总表.xlsx')\n",
    "    \n",
    "    # 将含测绘发票的销售总表输出到 Excel 文件\n",
    "    try:\n",
    "        result.to_excel(ouf, index=False)\n",
    "        print('成功生成 : 含测绘发票的销售总表.xlsx')\n",
    "    except:\n",
    "        print('生成失败!')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
